
package com.jlg.util.dom;

import java.net.URL;

import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
 * 
 * @author huazhang
 * @since 2011-03-20
 */
public class HTMLMetaProcessor
{

	public HTMLMetaProcessor()
	{
	}

	public static final void getMetaTags(HTMLMetaTags metaTags, Node node, URL currURL)
	{
		metaTags.reset();
		getMetaTagsHelper(metaTags, node, currURL);
	}

	private static final void getMetaTagsHelper(HTMLMetaTags metaTags, Node node, URL currURL)
	{
		if (node.getNodeType() == Node.ELEMENT_NODE)
		{
			if ("body".equalsIgnoreCase(node.getNodeName()))
				return;
			if ("meta".equalsIgnoreCase(node.getNodeName()))
			{
				NamedNodeMap attrs = node.getAttributes();
				Node nameNode = null;
				Node equivNode = null;
				Node contentNode = null;
				for (int i = 0; i < attrs.getLength(); i++)
				{
					Node attr = attrs.item(i);
					String attrName = attr.getNodeName().toLowerCase();
					if (attrName.equals("name"))
					{
						nameNode = attr;
						continue;
					}
					if (attrName.equals("http-equiv"))
					{
						equivNode = attr;
						continue;
					}
					if (attrName.equals("content"))
						contentNode = attr;
				}

				if (nameNode != null && contentNode != null)
				{
					String name = nameNode.getNodeValue().toLowerCase();
					metaTags.getGeneralTags().setProperty(name, contentNode.getNodeValue());
					if ("robots".equals(name) && contentNode != null)
					{
						String directives = contentNode.getNodeValue().toLowerCase();
						int index = directives.indexOf("none");
						if (index >= 0)
						{
							metaTags.setNoIndex();
							metaTags.setNoFollow();
						}
						index = directives.indexOf("all");
						if (index < 0)
							;
						index = directives.indexOf("noindex");
						if (index >= 0)
							metaTags.setNoIndex();
						index = directives.indexOf("nofollow");
						if (index >= 0)
							metaTags.setNoFollow();
						index = directives.indexOf("noarchive");
						if (index >= 0)
							metaTags.setNoCache();
					}
				}
				if (equivNode != null && contentNode != null)
				{
					String name = equivNode.getNodeValue().toLowerCase();
					String content = contentNode.getNodeValue();
					metaTags.getHttpEquivTags().setProperty(name, content);
					if ("pragma".equals(name))
					{
						content = content.toLowerCase();
						int index = content.indexOf("no-cache");
						if (index >= 0)
							metaTags.setNoCache();
					} else if ("refresh".equals(name))
					{
						int idx = content.indexOf(';');
						String time = null;
						if (idx == -1)
							time = content;
						else
							time = content.substring(0, idx);
						try
						{
							metaTags.setRefreshTime(Integer.parseInt(time));
							metaTags.setRefresh(true);
						} catch (Exception e)
						{
						}
						URL refreshUrl = null;
						if (metaTags.getRefresh() && idx != -1)
						{
							idx = content.toLowerCase().indexOf("url=");
							if (idx == -1)
								idx = content.indexOf(';') + 1;
							else
								idx += 4;
							if (idx != -1)
							{
								String url = content.substring(idx);
								try
								{
									refreshUrl = new URL(url);
								} catch (Exception e)
								{
									try
									{
										refreshUrl = new URL(currURL, url);
									} catch (Exception e1)
									{
										refreshUrl = null;
									}
								}
							}
						}
						if (metaTags.getRefresh())
						{
							if (refreshUrl == null)
								refreshUrl = currURL;
							metaTags.setRefreshHref(refreshUrl);
						}
					}
				}
			} else if ("base".equalsIgnoreCase(node.getNodeName()))
			{
				NamedNodeMap attrs = node.getAttributes();
				Node hrefNode = attrs.getNamedItem("href");
				if (hrefNode != null)
				{
					String urlString = hrefNode.getNodeValue();
					URL url = null;
					try
					{
						if (currURL == null)
							url = new URL(urlString);
						else
							url = new URL(currURL, urlString);
					} catch (Exception e)
					{
					}
					if (url != null)
						metaTags.setBaseHref(url);
				}
			}
		}
		NodeList children = node.getChildNodes();
		if (children != null)
		{
			int len = children.getLength();
			for (int i = 0; i < len; i++)
				getMetaTagsHelper(metaTags, children.item(i), currURL);

		}
	}
}
